import pandas as pd
import numpy as np
import plotly.express as plx
from plotly.subplots import make_subplots
import plotly.graph_objects as go
df = pd.DataFrame(pd.read_excel('C:\\Users\\harip\\INEURON_PROJECTS\\Energy Efficiency\\energy+efficiency\\ENB2012_data.xlsx'))
df
| X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | Y1 | Y2 | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.98 | 514.5 | 294.0 | 110.25 | 7.0 | 2 | 0.0 | 0 | 15.55 | 21.33 |
| 1 | 0.98 | 514.5 | 294.0 | 110.25 | 7.0 | 3 | 0.0 | 0 | 15.55 | 21.33 |
| 2 | 0.98 | 514.5 | 294.0 | 110.25 | 7.0 | 4 | 0.0 | 0 | 15.55 | 21.33 |
| 3 | 0.98 | 514.5 | 294.0 | 110.25 | 7.0 | 5 | 0.0 | 0 | 15.55 | 21.33 |
| 4 | 0.90 | 563.5 | 318.5 | 122.50 | 7.0 | 2 | 0.0 | 0 | 20.84 | 28.28 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 763 | 0.64 | 784.0 | 343.0 | 220.50 | 3.5 | 5 | 0.4 | 5 | 17.88 | 21.40 |
| 764 | 0.62 | 808.5 | 367.5 | 220.50 | 3.5 | 2 | 0.4 | 5 | 16.54 | 16.88 |
| 765 | 0.62 | 808.5 | 367.5 | 220.50 | 3.5 | 3 | 0.4 | 5 | 16.44 | 17.11 |
| 766 | 0.62 | 808.5 | 367.5 | 220.50 | 3.5 | 4 | 0.4 | 5 | 16.48 | 16.61 |
| 767 | 0.62 | 808.5 | 367.5 | 220.50 | 3.5 | 5 | 0.4 | 5 | 16.64 | 16.03 |
768 rows × 10 columns
df.isnull().sum()
X1 0 X2 0 X3 0 X4 0 X5 0 X6 0 X7 0 X8 0 Y1 0 Y2 0 dtype: int64
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 768 entries, 0 to 767 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 X1 768 non-null float64 1 X2 768 non-null float64 2 X3 768 non-null float64 3 X4 768 non-null float64 4 X5 768 non-null float64 5 X6 768 non-null int64 6 X7 768 non-null float64 7 X8 768 non-null int64 8 Y1 768 non-null float64 9 Y2 768 non-null float64 dtypes: float64(8), int64(2) memory usage: 60.1 KB
df.describe()
| X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | Y1 | Y2 | |
|---|---|---|---|---|---|---|---|---|---|---|
| count | 768.000000 | 768.000000 | 768.000000 | 768.000000 | 768.00000 | 768.000000 | 768.000000 | 768.00000 | 768.000000 | 768.000000 |
| mean | 0.764167 | 671.708333 | 318.500000 | 176.604167 | 5.25000 | 3.500000 | 0.234375 | 2.81250 | 22.307195 | 24.587760 |
| std | 0.105777 | 88.086116 | 43.626481 | 45.165950 | 1.75114 | 1.118763 | 0.133221 | 1.55096 | 10.090204 | 9.513306 |
| min | 0.620000 | 514.500000 | 245.000000 | 110.250000 | 3.50000 | 2.000000 | 0.000000 | 0.00000 | 6.010000 | 10.900000 |
| 25% | 0.682500 | 606.375000 | 294.000000 | 140.875000 | 3.50000 | 2.750000 | 0.100000 | 1.75000 | 12.992500 | 15.620000 |
| 50% | 0.750000 | 673.750000 | 318.500000 | 183.750000 | 5.25000 | 3.500000 | 0.250000 | 3.00000 | 18.950000 | 22.080000 |
| 75% | 0.830000 | 741.125000 | 343.000000 | 220.500000 | 7.00000 | 4.250000 | 0.400000 | 4.00000 | 31.667500 | 33.132500 |
| max | 0.980000 | 808.500000 | 416.500000 | 220.500000 | 7.00000 | 5.000000 | 0.400000 | 5.00000 | 43.100000 | 48.030000 |
plx.imshow(df.corr(),height=750,width=750,text_auto=True)
df.corr()['X6']
X1 4.678592e-17 X2 -3.459372e-17 X3 -2.429499e-17 X4 -5.830058e-17 X5 4.492205e-17 X6 1.000000e+00 X7 -9.406007e-16 X8 -2.549352e-16 Y1 -2.586763e-03 Y2 1.428960e-02 Name: X6, dtype: float64
df['X6'].value_counts()
2 192 3 192 4 192 5 192 Name: X6, dtype: int64
df['X8'].value_counts()
1 144 2 144 3 144 4 144 5 144 0 48 Name: X8, dtype: int64
plx.box(x = df['X8'],y=df['Y1'],color=df['X8'])
plx.box(x = df['X8'],y=df['Y2'],color=df['X8'])
plx.imshow(df.corr(),height=750,width=750,text_auto=True)
df.drop(['X6'],axis=1,inplace=True)
df
| X1 | X2 | X3 | X4 | X5 | X7 | X8 | Y1 | Y2 | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.98 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 1 | 0.98 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 2 | 0.98 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 3 | 0.98 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 4 | 0.90 | 563.5 | 318.5 | 122.50 | 7.0 | 0.0 | 0 | 20.84 | 28.28 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 763 | 0.64 | 784.0 | 343.0 | 220.50 | 3.5 | 0.4 | 5 | 17.88 | 21.40 |
| 764 | 0.62 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.54 | 16.88 |
| 765 | 0.62 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.44 | 17.11 |
| 766 | 0.62 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.48 | 16.61 |
| 767 | 0.62 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.64 | 16.03 |
768 rows × 9 columns
df.drop(['X1'],axis=1,inplace=True)
df
| X2 | X3 | X4 | X5 | X7 | X8 | Y1 | Y2 | |
|---|---|---|---|---|---|---|---|---|
| 0 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 1 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 2 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 3 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 4 | 563.5 | 318.5 | 122.50 | 7.0 | 0.0 | 0 | 20.84 | 28.28 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 763 | 784.0 | 343.0 | 220.50 | 3.5 | 0.4 | 5 | 17.88 | 21.40 |
| 764 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.54 | 16.88 |
| 765 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.44 | 17.11 |
| 766 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.48 | 16.61 |
| 767 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.64 | 16.03 |
768 rows × 8 columns
df.loc[(df['X8']>0), 'X8'] = 1
df
| X2 | X3 | X4 | X5 | X7 | X8 | Y1 | Y2 | |
|---|---|---|---|---|---|---|---|---|
| 0 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 1 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 2 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 3 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 4 | 563.5 | 318.5 | 122.50 | 7.0 | 0.0 | 0 | 20.84 | 28.28 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 763 | 784.0 | 343.0 | 220.50 | 3.5 | 0.4 | 1 | 17.88 | 21.40 |
| 764 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 1 | 16.54 | 16.88 |
| 765 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 1 | 16.44 | 17.11 |
| 766 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 1 | 16.48 | 16.61 |
| 767 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 1 | 16.64 | 16.03 |
768 rows × 8 columns
plx.imshow(df.corr(),height=750,width=750,text_auto=True)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 768 entries, 0 to 767 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 X2 768 non-null float64 1 X3 768 non-null float64 2 X4 768 non-null float64 3 X5 768 non-null float64 4 X7 768 non-null float64 5 X8 768 non-null int64 6 Y1 768 non-null float64 7 Y2 768 non-null float64 dtypes: float64(7), int64(1) memory usage: 48.1 KB
plx.box(x = df['X8'],y=df['Y1'],color=df['X8'])
plx.box(x = df['X8'],y=df['Y1'],color=df['X8'])
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import r2_score
from tqdm import tqdm
X = df.drop(['Y1','Y2'],axis=1)
Y = df.drop(['X2','X3','X4','X5','X7','X8'],axis=1)
lr_trn_score,rfr_trn_score,sgd_trn_score,en_trn_score,abr_trn_score,gbr_trn_score,svr_trn_score,xgb_trn_score,cbr_trn_score = [],[],[],[],[],[],[],[],[]
lr_test_score,rfr_test_score,sgd_test_score,en_test_score,abr_test_score,gbr_test_score,svr_test_score,xgb_test_score,cbr_test_score = [],[],[],[],[],[],[],[],[]
for i in tqdm(range(1000)):
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
lr = LinearRegression().fit(x_train, y_train)
pred = lr.predict(x_test)
pred_trn = lr.predict(x_train)
lr_test_score.append(r2_score(y_test, pred))
lr_trn_score.append(r2_score(y_train, pred_trn))
sgd = MultiOutputRegressor(SGDRegressor()).fit(x_train,y_train)
pred = sgd.predict(x_test)
pred_trn = sgd.predict(x_train)
sgd_test_score.append(r2_score(y_test, pred))
sgd_trn_score.append(r2_score(y_train, pred_trn))
en = ElasticNet().fit(x_train,y_train)
pred = en.predict(x_test)
pred_trn = en.predict(x_train)
en_test_score.append(r2_score(y_test, pred))
en_trn_score.append(r2_score(y_train, pred_trn))
abr = MultiOutputRegressor(AdaBoostRegressor()).fit(x_train,y_train)
pred = abr.predict(x_test)
pred_trn = abr.predict(x_train)
abr_test_score.append(r2_score(y_test, pred))
abr_trn_score.append(r2_score(y_train, pred_trn))
gbr = MultiOutputRegressor(GradientBoostingRegressor()).fit(x_train,y_train)
pred = gbr.predict(x_test)
pred_trn = gbr.predict(x_train)
gbr_test_score.append(r2_score(y_test, pred))
gbr_trn_score.append(r2_score(y_train, pred_trn))
svr = MultiOutputRegressor(SVR()).fit(x_train,y_train)
pred = svr.predict(x_test)
pred_trn = svr.predict(x_train)
svr_test_score.append(r2_score(y_test, pred))
svr_trn_score.append(r2_score(y_train, pred_trn))
xgb = MultiOutputRegressor(XGBRegressor()).fit(x_train,y_train)
pred = xgb.predict(x_test)
pred_trn = xgb.predict(x_train)
xgb_test_score.append(r2_score(y_test, pred))
xgb_trn_score.append(r2_score(y_train, pred_trn))
cbr = MultiOutputRegressor(CatBoostRegressor(verbose=0)).fit(x_train,y_train)
pred = cbr.predict(x_test)
pred_trn = cbr.predict(x_train)
cbr_test_score.append(r2_score(y_test, pred))
cbr_trn_score.append(r2_score(y_train, pred_trn))
rfr = RandomForestRegressor().fit(x_train, y_train)
pred = rfr.predict(x_test)
pred_trn = lr.predict(x_train)
rfr_test_score.append(r2_score(y_test, pred))
rfr_trn_score.append(r2_score(y_train, pred_trn))
100%|██████████| 1000/1000 [30:07<00:00, 1.81s/it]
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = lr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = lr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on Linear Regression')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = sgd_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = sgd_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on SGDRegressor')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = en_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = en_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on ElasticNet Regression')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = abr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = abr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on AdaBoostRegressor')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = gbr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = gbr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on GradientBoostingRegressor')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = svr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = svr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on Support Vector Regressor')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = xgb_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = xgb_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on XGBRegressor')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = cbr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = cbr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on CatBoostRegressor')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = rfr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = rfr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on RandomForestRegressor')
fig.show()
X = df.drop(['Y1','Y2'],axis=1)
Y = df.drop(['X2','X3','X4','X5','X7','X8'],axis=1)
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
cbr = MultiOutputRegressor(CatBoostRegressor(verbose=0, n_estimators=10000,early_stopping_rounds=100)).fit(x_train,y_train)
pred = cbr.predict(x_test)
pred_trn = cbr.predict(x_train)
print(r2_score(y_train, pred_trn), r2_score(y_test, pred))
0.9854413978366589 0.9814623383584027
y1_pred,y2_pred = [],[]
for i in range(len(pred)):
y1_pred.append(pred[i][0])
y2_pred.append(pred[i][1])
def visulaize_performance_of_the_model(pred, y_test, modelname):
# Plotting both line & scatter plot in same graph of predicted values to check the performance of the model in visualization.
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(0,50), y=np.arange(0,50),
mode='lines',
name='perfectline'))
fig.add_trace(go.Scatter(x=pred, y=y_test,
mode='markers',
name='predictions'))
fig.update_layout(
title=f"Performance of {modelname} on Test data",
xaxis_title="Predicted",
yaxis_title="Actual",
font=dict(
family="Courier New, monospace",
size=13,
color="RebeccaPurple"
)
)
fig.show()
visulaize_performance_of_the_model(y1_pred, y_test['Y1'], 'CatBoost regressor')
visulaize_performance_of_the_model(y2_pred, y_test['Y2'], 'CatBoost regressor')
X = df.drop(['Y1','Y2'],axis=1)
Y = df['Y1']
lr_trn_score,rfr_trn_score,abr_trn_score,gbr_trn_score,xgb_trn_score,cbr_trn_score = [],[],[],[],[],[]
lr_test_score,rfr_test_score,abr_test_score,gbr_test_score,xgb_test_score,cbr_test_score = [],[],[],[],[],[]
for i in tqdm(range(1000)):
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
lr = LinearRegression().fit(x_train, y_train)
pred = lr.predict(x_test)
pred_trn = lr.predict(x_train)
lr_test_score.append(r2_score(y_test, pred))
lr_trn_score.append(r2_score(y_train, pred_trn))
abr = AdaBoostRegressor().fit(x_train,y_train)
pred = abr.predict(x_test)
pred_trn = abr.predict(x_train)
abr_test_score.append(r2_score(y_test, pred))
abr_trn_score.append(r2_score(y_train, pred_trn))
gbr = GradientBoostingRegressor().fit(x_train,y_train)
pred = gbr.predict(x_test)
pred_trn = gbr.predict(x_train)
gbr_test_score.append(r2_score(y_test, pred))
gbr_trn_score.append(r2_score(y_train, pred_trn))
xgb = XGBRegressor().fit(x_train,y_train)
pred = xgb.predict(x_test)
pred_trn = xgb.predict(x_train)
xgb_test_score.append(r2_score(y_test, pred))
xgb_trn_score.append(r2_score(y_train, pred_trn))
cbr = CatBoostRegressor(verbose=0).fit(x_train,y_train)
pred = cbr.predict(x_test)
pred_trn = cbr.predict(x_train)
cbr_test_score.append(r2_score(y_test, pred))
cbr_trn_score.append(r2_score(y_train, pred_trn))
rfr = RandomForestRegressor().fit(x_train, y_train)
pred = rfr.predict(x_test)
pred_trn = lr.predict(x_train)
rfr_test_score.append(r2_score(y_test, pred))
rfr_trn_score.append(r2_score(y_train, pred_trn))
100%|██████████| 1000/1000 [14:46<00:00, 1.13it/s]
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = lr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = lr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on Linear Regression')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = abr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = abr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on AdaBoostRegressor')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = gbr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = gbr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on GradientBoostingRegressor')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = xgb_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = xgb_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on XGBRegressor')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = cbr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = cbr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on CatBoostRegressor')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = rfr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = rfr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on RandomForestRegressor')
fig.show()
X = df.drop(['Y1','Y2'],axis=1)
Y = df['Y2']
lr_trn_score,rfr_trn_score,abr_trn_score,gbr_trn_score,xgb_trn_score,cbr_trn_score = [],[],[],[],[],[]
lr_test_score,rfr_test_score,abr_test_score,gbr_test_score,xgb_test_score,cbr_test_score = [],[],[],[],[],[]
for i in tqdm(range(1000)):
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
lr = LinearRegression().fit(x_train, y_train)
pred = lr.predict(x_test)
pred_trn = lr.predict(x_train)
lr_test_score.append(r2_score(y_test, pred))
lr_trn_score.append(r2_score(y_train, pred_trn))
abr = AdaBoostRegressor().fit(x_train,y_train)
pred = abr.predict(x_test)
pred_trn = abr.predict(x_train)
abr_test_score.append(r2_score(y_test, pred))
abr_trn_score.append(r2_score(y_train, pred_trn))
gbr = GradientBoostingRegressor().fit(x_train,y_train)
pred = gbr.predict(x_test)
pred_trn = gbr.predict(x_train)
gbr_test_score.append(r2_score(y_test, pred))
gbr_trn_score.append(r2_score(y_train, pred_trn))
xgb = XGBRegressor().fit(x_train,y_train)
pred = xgb.predict(x_test)
pred_trn = xgb.predict(x_train)
xgb_test_score.append(r2_score(y_test, pred))
xgb_trn_score.append(r2_score(y_train, pred_trn))
cbr = CatBoostRegressor(verbose=0).fit(x_train,y_train)
pred = cbr.predict(x_test)
pred_trn = cbr.predict(x_train)
cbr_test_score.append(r2_score(y_test, pred))
cbr_trn_score.append(r2_score(y_train, pred_trn))
rfr = RandomForestRegressor().fit(x_train, y_train)
pred = rfr.predict(x_test)
pred_trn = lr.predict(x_train)
rfr_test_score.append(r2_score(y_test, pred))
rfr_trn_score.append(r2_score(y_train, pred_trn))
100%|██████████| 1000/1000 [14:05<00:00, 1.18it/s]
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = lr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = lr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on Linear Regression')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = abr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = abr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on AdaBoostRegressor')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = gbr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = gbr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on GradientBoostingRegressor')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = xgb_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = xgb_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on XGBRegressor')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = cbr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = cbr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on CatBoostRegressor')
fig.show()
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = rfr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = rfr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on RandomForestRegressor')
fig.show()
X = df.drop(['Y2'],axis=1)
Y = df['Y2']
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
cbr = CatBoostRegressor(verbose=0, n_estimators=10000,early_stopping_rounds=100).fit(x_train,y_train)
pred = cbr.predict(x_test)
pred_trn = cbr.predict(x_train)
print(r2_score(y_train, pred_trn), r2_score(y_test, pred))
0.9944150509001013 0.9829129866383302
visulaize_performance_of_the_model(pred, y_test, 'CatBoost regressor')
temp_df = df.loc[df['Y2'] > 25]
temp_df
| X2 | X3 | X4 | X5 | X7 | X8 | Y1 | Y2 | |
|---|---|---|---|---|---|---|---|---|
| 4 | 563.5 | 318.5 | 122.5 | 7.0 | 0.0 | 0 | 20.84 | 28.28 |
| 5 | 563.5 | 318.5 | 122.5 | 7.0 | 0.0 | 0 | 21.46 | 25.38 |
| 6 | 563.5 | 318.5 | 122.5 | 7.0 | 0.0 | 0 | 20.71 | 25.16 |
| 7 | 563.5 | 318.5 | 122.5 | 7.0 | 0.0 | 0 | 19.68 | 29.60 |
| 8 | 588.0 | 294.0 | 147.0 | 7.0 | 0.0 | 0 | 19.50 | 27.30 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 739 | 637.0 | 343.0 | 147.0 | 7.0 | 0.4 | 1 | 40.79 | 44.87 |
| 740 | 661.5 | 416.5 | 122.5 | 7.0 | 0.4 | 1 | 38.82 | 39.37 |
| 741 | 661.5 | 416.5 | 122.5 | 7.0 | 0.4 | 1 | 39.72 | 39.80 |
| 742 | 661.5 | 416.5 | 122.5 | 7.0 | 0.4 | 1 | 39.31 | 37.79 |
| 743 | 661.5 | 416.5 | 122.5 | 7.0 | 0.4 | 1 | 39.86 | 38.18 |
368 rows × 8 columns
temp_df['X8'].value_counts()
1 354 0 14 Name: X8, dtype: int64
temp_df.loc[temp_df['X8'] == 0]
| X2 | X3 | X4 | X5 | X7 | X8 | Y1 | Y2 | |
|---|---|---|---|---|---|---|---|---|
| 4 | 563.5 | 318.5 | 122.5 | 7.0 | 0.0 | 0 | 20.84 | 28.28 |
| 5 | 563.5 | 318.5 | 122.5 | 7.0 | 0.0 | 0 | 21.46 | 25.38 |
| 6 | 563.5 | 318.5 | 122.5 | 7.0 | 0.0 | 0 | 20.71 | 25.16 |
| 7 | 563.5 | 318.5 | 122.5 | 7.0 | 0.0 | 0 | 19.68 | 29.60 |
| 8 | 588.0 | 294.0 | 147.0 | 7.0 | 0.0 | 0 | 19.50 | 27.30 |
| 11 | 588.0 | 294.0 | 147.0 | 7.0 | 0.0 | 0 | 18.31 | 27.87 |
| 16 | 637.0 | 343.0 | 147.0 | 7.0 | 0.0 | 0 | 28.52 | 37.73 |
| 17 | 637.0 | 343.0 | 147.0 | 7.0 | 0.0 | 0 | 29.90 | 31.27 |
| 18 | 637.0 | 343.0 | 147.0 | 7.0 | 0.0 | 0 | 29.63 | 30.93 |
| 19 | 637.0 | 343.0 | 147.0 | 7.0 | 0.0 | 0 | 28.75 | 39.44 |
| 20 | 661.5 | 416.5 | 122.5 | 7.0 | 0.0 | 0 | 24.77 | 29.79 |
| 21 | 661.5 | 416.5 | 122.5 | 7.0 | 0.0 | 0 | 23.93 | 29.68 |
| 22 | 661.5 | 416.5 | 122.5 | 7.0 | 0.0 | 0 | 24.77 | 29.79 |
| 23 | 661.5 | 416.5 | 122.5 | 7.0 | 0.0 | 0 | 23.93 | 29.40 |
df = pd.DataFrame(pd.read_excel('C:\\Users\\harip\\INEURON_PROJECTS\\Energy Efficiency\\energy+efficiency\\ENB2012_data.xlsx'))
df
| X1 | X2 | X3 | X4 | X5 | X6 | X7 | X8 | Y1 | Y2 | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.98 | 514.5 | 294.0 | 110.25 | 7.0 | 2 | 0.0 | 0 | 15.55 | 21.33 |
| 1 | 0.98 | 514.5 | 294.0 | 110.25 | 7.0 | 3 | 0.0 | 0 | 15.55 | 21.33 |
| 2 | 0.98 | 514.5 | 294.0 | 110.25 | 7.0 | 4 | 0.0 | 0 | 15.55 | 21.33 |
| 3 | 0.98 | 514.5 | 294.0 | 110.25 | 7.0 | 5 | 0.0 | 0 | 15.55 | 21.33 |
| 4 | 0.90 | 563.5 | 318.5 | 122.50 | 7.0 | 2 | 0.0 | 0 | 20.84 | 28.28 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 763 | 0.64 | 784.0 | 343.0 | 220.50 | 3.5 | 5 | 0.4 | 5 | 17.88 | 21.40 |
| 764 | 0.62 | 808.5 | 367.5 | 220.50 | 3.5 | 2 | 0.4 | 5 | 16.54 | 16.88 |
| 765 | 0.62 | 808.5 | 367.5 | 220.50 | 3.5 | 3 | 0.4 | 5 | 16.44 | 17.11 |
| 766 | 0.62 | 808.5 | 367.5 | 220.50 | 3.5 | 4 | 0.4 | 5 | 16.48 | 16.61 |
| 767 | 0.62 | 808.5 | 367.5 | 220.50 | 3.5 | 5 | 0.4 | 5 | 16.64 | 16.03 |
768 rows × 10 columns
df.drop(['X1','X6'],axis=1,inplace=True)
df
| X2 | X3 | X4 | X5 | X7 | X8 | Y1 | Y2 | |
|---|---|---|---|---|---|---|---|---|
| 0 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 1 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 2 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 3 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 4 | 563.5 | 318.5 | 122.50 | 7.0 | 0.0 | 0 | 20.84 | 28.28 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 763 | 784.0 | 343.0 | 220.50 | 3.5 | 0.4 | 5 | 17.88 | 21.40 |
| 764 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.54 | 16.88 |
| 765 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.44 | 17.11 |
| 766 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.48 | 16.61 |
| 767 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.64 | 16.03 |
768 rows × 8 columns
X = df.drop(['Y1','Y2'],axis=1)
Y = df['Y2']
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
cbr = CatBoostRegressor(verbose=0, n_estimators=10000,early_stopping_rounds=100).fit(x_train,y_train)
pred = cbr.predict(x_test)
pred_trn = cbr.predict(x_train)
print(r2_score(y_train, pred_trn), r2_score(y_test, pred))
0.977063447328585 0.9401598996120876
visulaize_performance_of_the_model(pred, y_test, 'CatBoost regressor')
X = df.drop(['Y2'],axis=1)
Y = df['Y2']
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
cbr = CatBoostRegressor(verbose=0, n_estimators=10000,early_stopping_rounds=100).fit(x_train,y_train)
pred = cbr.predict(x_test)
pred_trn = cbr.predict(x_train)
print(r2_score(y_train, pred_trn), r2_score(y_test, pred))
0.9961402649937874 0.9825615016523755
visulaize_performance_of_the_model(pred, y_test, 'CatBoost regressor')
temp_df = df.drop(['X8'],axis=1)
temp_df
| X2 | X3 | X4 | X5 | X7 | Y1 | Y2 | |
|---|---|---|---|---|---|---|---|
| 0 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 15.55 | 21.33 |
| 1 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 15.55 | 21.33 |
| 2 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 15.55 | 21.33 |
| 3 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 15.55 | 21.33 |
| 4 | 563.5 | 318.5 | 122.50 | 7.0 | 0.0 | 20.84 | 28.28 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 763 | 784.0 | 343.0 | 220.50 | 3.5 | 0.4 | 17.88 | 21.40 |
| 764 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 16.54 | 16.88 |
| 765 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 16.44 | 17.11 |
| 766 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 16.48 | 16.61 |
| 767 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 16.64 | 16.03 |
768 rows × 7 columns
X = temp_df.drop(['Y2'],axis=1)
Y = temp_df['Y2']
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
cbr = CatBoostRegressor(verbose=0, n_estimators=10000,early_stopping_rounds=100).fit(x_train,y_train)
pred = cbr.predict(x_test)
pred_trn = cbr.predict(x_train)
print(r2_score(y_train, pred_trn), r2_score(y_test, pred))
0.9939310855310725 0.9892019517369207
visulaize_performance_of_the_model(pred, y_test, 'CatBoost regressor')
X = temp_df.drop(['Y1','Y2'],axis=1)
Y = temp_df['Y2']
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
cbr = CatBoostRegressor(verbose=0, n_estimators=10000,early_stopping_rounds=100).fit(x_train,y_train)
pred = cbr.predict(x_test)
pred_trn = cbr.predict(x_train)
print(r2_score(y_train, pred_trn), r2_score(y_test, pred))
0.9710728626727535 0.9747745043013276
visulaize_performance_of_the_model(pred, y_test, 'CatBoost regressor')
df = pd.DataFrame(pd.read_excel('C:\\Users\\harip\\INEURON_PROJECTS\\Energy Efficiency\\energy+efficiency\\ENB2012_data.xlsx'))
temp_df = df.drop(['X1','X6'],axis=1)
temp_df
| X2 | X3 | X4 | X5 | X7 | X8 | Y1 | Y2 | |
|---|---|---|---|---|---|---|---|---|
| 0 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 1 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 2 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 3 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 4 | 563.5 | 318.5 | 122.50 | 7.0 | 0.0 | 0 | 20.84 | 28.28 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 763 | 784.0 | 343.0 | 220.50 | 3.5 | 0.4 | 5 | 17.88 | 21.40 |
| 764 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.54 | 16.88 |
| 765 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.44 | 17.11 |
| 766 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.48 | 16.61 |
| 767 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 5 | 16.64 | 16.03 |
768 rows × 8 columns
temp_df.loc[(temp_df['X8'] > 0), 'X8']=1
temp_df
| X2 | X3 | X4 | X5 | X7 | X8 | Y1 | Y2 | |
|---|---|---|---|---|---|---|---|---|
| 0 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 1 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 2 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 3 | 514.5 | 294.0 | 110.25 | 7.0 | 0.0 | 0 | 15.55 | 21.33 |
| 4 | 563.5 | 318.5 | 122.50 | 7.0 | 0.0 | 0 | 20.84 | 28.28 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 763 | 784.0 | 343.0 | 220.50 | 3.5 | 0.4 | 1 | 17.88 | 21.40 |
| 764 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 1 | 16.54 | 16.88 |
| 765 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 1 | 16.44 | 17.11 |
| 766 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 1 | 16.48 | 16.61 |
| 767 | 808.5 | 367.5 | 220.50 | 3.5 | 0.4 | 1 | 16.64 | 16.03 |
768 rows × 8 columns
X = temp_df.drop(['Y2'],axis=1)
Y = temp_df['Y2']
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
cbr = CatBoostRegressor(verbose=0, n_estimators=10000,early_stopping_rounds=100).fit(x_train,y_train)
pred = cbr.predict(x_test)
pred_trn = cbr.predict(x_train)
print(r2_score(y_train, pred_trn), r2_score(y_test, pred))
0.9957190858593685 0.9721345451651117
visulaize_performance_of_the_model(pred, y_test, 'CatBoost regressor')
X = temp_df.drop(['Y1','Y2'],axis=1)
Y = temp_df['Y1']
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
cbr = CatBoostRegressor(verbose=0, n_estimators=10000,early_stopping_rounds=100).fit(x_train,y_train)
pred = cbr.predict(x_test)
pred_trn = cbr.predict(x_train)
print(r2_score(y_train, pred_trn), r2_score(y_test, pred))
0.9980566490700827 0.9974422531623282
visulaize_performance_of_the_model(pred, y_test, 'CatBoost regressor')
df = pd.DataFrame(pd.read_excel('C:\\Users\\harip\\INEURON_PROJECTS\\Energy Efficiency\\energy+efficiency\\ENB2012_data.xlsx'))
X = df.drop(['Y1','Y2'],axis=1)
Y = df['Y1']
#x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
cbr = CatBoostRegressor(verbose=0, n_estimators=10000,early_stopping_rounds=100).fit(X, Y)
pred = cbr.predict(X)
print(r2_score(Y, pred))
visulaize_performance_of_the_model(pred, Y, 'CatBoost regressor')
0.9999073199670971
df = pd.DataFrame(pd.read_excel('C:\\Users\\harip\\INEURON_PROJECTS\\Energy Efficiency\\energy+efficiency\\ENB2012_data.xlsx'))
X = df.drop(['Y2'],axis=1)
Y = df['Y2']
#x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
cbr = CatBoostRegressor(verbose=0, n_estimators=10000,early_stopping_rounds=100).fit(X, Y)
pred = cbr.predict(X)
print(r2_score(Y, pred))
visulaize_performance_of_the_model(pred, Y, 'CatBoost regressor')
0.9992043779559775
df = pd.DataFrame(pd.read_excel('C:\\Users\\harip\\INEURON_PROJECTS\\Energy Efficiency\\energy+efficiency\\ENB2012_data.xlsx'))
X = df.drop(['X1','X6','Y1','Y2'],axis=1)
Y = df['Y1']
#x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
cbr = CatBoostRegressor(verbose=0, n_estimators=10000,early_stopping_rounds=100).fit(X, Y)
pred = cbr.predict(X)
print(r2_score(Y, pred))
visulaize_performance_of_the_model(pred, Y, 'CatBoost regressor')
0.9987571086021994
df = pd.DataFrame(pd.read_excel('C:\\Users\\harip\\INEURON_PROJECTS\\Energy Efficiency\\energy+efficiency\\ENB2012_data.xlsx'))
X = df.drop(['X1','X6','Y2'],axis=1)
Y = df['Y2']
#x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
cbr = CatBoostRegressor(verbose=0, n_estimators=10000,early_stopping_rounds=100).fit(X, Y)
pred = cbr.predict(X)
print(r2_score(Y, pred))
visulaize_performance_of_the_model(pred, Y, 'CatBoost regressor')
0.9962330440522129
df = pd.DataFrame(pd.read_excel('C:\\Users\\harip\\INEURON_PROJECTS\\Energy Efficiency\\energy+efficiency\\ENB2012_data.xlsx'))
X = df.drop(['Y1','Y2'],axis=1)
Y = df.drop(['X1','X2','X3','X4','X5','X6','X7','X8'],axis=1)
lr_trn_score,rfr_trn_score,sgd_trn_score,en_trn_score,abr_trn_score,gbr_trn_score,svr_trn_score,xgb_trn_score,cbr_trn_score = [],[],[],[],[],[],[],[],[]
lr_test_score,rfr_test_score,sgd_test_score,en_test_score,abr_test_score,gbr_test_score,svr_test_score,xgb_test_score,cbr_test_score = [],[],[],[],[],[],[],[],[]
for i in tqdm(range(1000)):
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
lr = LinearRegression().fit(x_train, y_train)
pred = lr.predict(x_test)
pred_trn = lr.predict(x_train)
lr_test_score.append(r2_score(y_test, pred))
lr_trn_score.append(r2_score(y_train, pred_trn))
sgd = MultiOutputRegressor(SGDRegressor()).fit(x_train,y_train)
pred = sgd.predict(x_test)
pred_trn = sgd.predict(x_train)
sgd_test_score.append(r2_score(y_test, pred))
sgd_trn_score.append(r2_score(y_train, pred_trn))
en = ElasticNet().fit(x_train,y_train)
pred = en.predict(x_test)
pred_trn = en.predict(x_train)
en_test_score.append(r2_score(y_test, pred))
en_trn_score.append(r2_score(y_train, pred_trn))
abr = MultiOutputRegressor(AdaBoostRegressor()).fit(x_train,y_train)
pred = abr.predict(x_test)
pred_trn = abr.predict(x_train)
abr_test_score.append(r2_score(y_test, pred))
abr_trn_score.append(r2_score(y_train, pred_trn))
gbr = MultiOutputRegressor(GradientBoostingRegressor()).fit(x_train,y_train)
pred = gbr.predict(x_test)
pred_trn = gbr.predict(x_train)
gbr_test_score.append(r2_score(y_test, pred))
gbr_trn_score.append(r2_score(y_train, pred_trn))
svr = MultiOutputRegressor(SVR()).fit(x_train,y_train)
pred = svr.predict(x_test)
pred_trn = svr.predict(x_train)
svr_test_score.append(r2_score(y_test, pred))
svr_trn_score.append(r2_score(y_train, pred_trn))
xgb = MultiOutputRegressor(XGBRegressor()).fit(x_train,y_train)
pred = xgb.predict(x_test)
pred_trn = xgb.predict(x_train)
xgb_test_score.append(r2_score(y_test, pred))
xgb_trn_score.append(r2_score(y_train, pred_trn))
cbr = MultiOutputRegressor(CatBoostRegressor(verbose=0)).fit(x_train,y_train)
pred = cbr.predict(x_test)
pred_trn = cbr.predict(x_train)
cbr_test_score.append(r2_score(y_test, pred))
cbr_trn_score.append(r2_score(y_train, pred_trn))
rfr = RandomForestRegressor().fit(x_train, y_train)
pred = rfr.predict(x_test)
pred_trn = lr.predict(x_train)
rfr_test_score.append(r2_score(y_test, pred))
rfr_trn_score.append(r2_score(y_train, pred_trn))
100%|██████████| 1000/1000 [34:04<00:00, 2.04s/it]
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = cbr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = cbr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on CatBoostRegressor')
fig.show()
print("Train Accuracy :",np.mean(cbr_trn_score)*100)
print("Test Accuracy :",np.mean(cbr_test_score)*100)
Train Accuracy : 99.94105122035963 Test Accuracy : 99.68085255927099
pred
array([[36.2183 , 39.1086 ],
[12.1999 , 14.9466 ],
[36.5728 , 37.2743 ],
[14.6368 , 17.0479 ],
[24.2178 , 25.9339 ],
[32.2173 , 33.1127 ],
[36.1482 , 36.2495 ],
[25.3945 , 26.8092 ],
[14.2866 , 15.0643 ],
[29.2729 , 30.569 ],
[14.8952 , 15.576 ],
[12.9644 , 15.6852 ],
[26.2406 , 28.0722 ],
[42.0551 , 42.334 ],
[40.4267 , 39.8254 ],
[36.6571 , 36.9522 ],
[28.5724 , 31.5279 ],
[14.4487 , 16.7673 ],
[14.4393 , 17.0646 ],
[23.9168 , 25.5309 ],
[32.2052 , 35.2163 ],
[12.3751 , 15.3781 ],
[39.3341 , 43.0809 ],
[29.4039 , 29.5464 ],
[14.2608 , 17.0458 ],
[29.0632 , 31.4638 ],
[12.8261 , 15.9376 ],
[36.468 , 36.9184 ],
[12.9689 , 15.8419 ],
[26.6294 , 29.3116 ],
[10.3552 , 13.6203 ],
[11.5032 , 14.1564 ],
[29.1492 , 31.2337 ],
[16.7129 , 20.1717 ],
[32.5967 , 33.9185 ],
[41.8504 , 41.3938 ],
[29.3278 , 30.5144 ],
[19.9712 , 25.3508 ],
[11.3147 , 13.9994 ],
[25.8034 , 29.9774 ],
[17.0463 , 17.2059 ],
[26.4909 , 27.1038 ],
[12.9759 , 15.7958 ],
[36.3722 , 39.7724 ],
[12.4379 , 15.2596 ],
[32.3041 , 32.8554 ],
[19.4 , 22.6602 ],
[14.5053 , 16.7478 ],
[36.3896 , 37.273 ],
[26.0264 , 29.3768 ],
[28.7723 , 29.4696 ],
[13.9855 , 16.055 ],
[18.8343 , 21.8517 ],
[11.1023 , 14.1537 ],
[12.6922 , 14.2257 ],
[35.959 , 36.6293 ],
[ 6.39752, 11.4227 ],
[12.3571 , 14.9618 ],
[15.1152 , 18.169 ],
[28.9095 , 30.4197 ],
[29.5598 , 31.1404 ],
[36.6148 , 37.0465 ],
[ 7.1943 , 12.3373 ],
[28.8042 , 30.7659 ],
[12.7798 , 14.0862 ],
[12.9721 , 15.9404 ],
[15.1621 , 19.3701 ],
[11.1977 , 14.1137 ],
[14.9104 , 15.68 ],
[16.5147 , 16.6574 ],
[32.5907 , 32.8658 ],
[14.5381 , 17.0505 ],
[15.0914 , 18.1621 ],
[12.4165 , 15.1511 ],
[25.8077 , 29.6147 ],
[14.4955 , 17.2472 ],
[29.3345 , 29.7631 ],
[35.2853 , 37.8541 ],
[10.3908 , 13.609 ],
[25.3517 , 26.5098 ],
[12.2314 , 15.2061 ],
[32.2517 , 33.359 ],
[11.576 , 14.1769 ],
[14.8518 , 15.6279 ],
[25.7311 , 30.3504 ],
[32.7338 , 34.2769 ],
[24.2097 , 29.6783 ],
[15.1874 , 19.247 ],
[10.4127 , 13.6406 ],
[11.1059 , 14.154 ],
[ 7.2058 , 12.3342 ],
[36.3363 , 36.8157 ],
[32.4119 , 33.8526 ],
[25.4973 , 27.6092 ],
[11.5408 , 13.7549 ],
[39.6898 , 40.22 ],
[17.1522 , 17.2261 ],
[35.6129 , 37.2382 ],
[18.8684 , 22.0105 ],
[23.877 , 25.8006 ],
[15.2372 , 17.7978 ],
[26.2085 , 28.2379 ],
[24.6007 , 26.4439 ],
[32.7914 , 34.3675 ],
[11.217 , 14.3025 ],
[32.4735 , 34.2448 ],
[16.6723 , 16.1324 ],
[11.2405 , 14.3703 ],
[26.254 , 28.2222 ],
[15.3025 , 19.3156 ],
[32.6635 , 33.2931 ],
[14.4026 , 14.9734 ],
[11.1536 , 14.3173 ],
[29.2478 , 31.0659 ],
[12.7292 , 14.2678 ],
[13.0975 , 15.614 ],
[28.7251 , 32.0108 ],
[29.2731 , 30.9188 ],
[12.4526 , 15.2772 ],
[16.8303 , 24.1076 ],
[12.8109 , 16.0854 ],
[15.166 , 19.3458 ],
[29.4558 , 30.9142 ],
[19.4516 , 24.8333 ],
[12.8066 , 14.1722 ],
[29.1944 , 30.9232 ],
[29.086 , 29.7849 ],
[16.9344 , 20.5372 ],
[15.2522 , 19.2634 ],
[28.7461 , 31.4444 ],
[11.8923 , 14.6332 ],
[16.4508 , 16.9661 ],
[ 8.6474 , 12.2 ],
[14.4671 , 15.3598 ],
[31.9033 , 34.6088 ],
[10.7261 , 14.048 ],
[32.2099 , 34.0358 ],
[32.3904 , 33.1941 ],
[12.3671 , 15.2376 ],
[ 6.40532, 11.5887 ],
[16.4114 , 17.0544 ],
[32.5872 , 34.1049 ],
[15.1811 , 17.6493 ],
[10.4097 , 13.5978 ],
[24.268 , 26.0036 ],
[32.576 , 33.2337 ],
[39.3769 , 40.4777 ],
[14.37 , 17.0741 ],
[14.0379 , 16.1653 ],
[28.6452 , 33.3159 ],
[12.6679 , 15.6556 ],
[29.2666 , 30.0071 ],
[29.649 , 28.7731 ],
[28.1403 , 33.8339 ]])
y_test
| Y1 | Y2 | |
|---|---|---|
| 112 | 35.65 | 41.07 |
| 414 | 12.10 | 15.57 |
| 256 | 37.03 | 34.99 |
| 561 | 14.70 | 17.00 |
| 194 | 24.04 | 26.18 |
| ... | ... | ... |
| 199 | 29.79 | 29.92 |
| 466 | 12.67 | 15.83 |
| 148 | 28.07 | 34.14 |
| 393 | 29.40 | 32.93 |
| 151 | 29.05 | 29.67 |
154 rows × 2 columns
test_values = pd.DataFrame(y_test)
test_values.reset_index(drop=True,inplace=True)
test_values
| Y1 | Y2 | |
|---|---|---|
| 0 | 35.65 | 41.07 |
| 1 | 12.10 | 15.57 |
| 2 | 37.03 | 34.99 |
| 3 | 14.70 | 17.00 |
| 4 | 24.04 | 26.18 |
| ... | ... | ... |
| 149 | 29.79 | 29.92 |
| 150 | 12.67 | 15.83 |
| 151 | 28.07 | 34.14 |
| 152 | 29.40 | 32.93 |
| 153 | 29.05 | 29.67 |
154 rows × 2 columns
result = pd.DataFrame(pred, columns = ['Predicted Y1', 'Predicted Y2'])
result
| Predicted Y1 | Predicted Y2 | |
|---|---|---|
| 0 | 36.2183 | 39.1086 |
| 1 | 12.1999 | 14.9466 |
| 2 | 36.5728 | 37.2743 |
| 3 | 14.6368 | 17.0479 |
| 4 | 24.2178 | 25.9339 |
| ... | ... | ... |
| 149 | 28.6452 | 33.3159 |
| 150 | 12.6679 | 15.6556 |
| 151 | 29.2666 | 30.0071 |
| 152 | 29.6490 | 28.7731 |
| 153 | 28.1403 | 33.8339 |
154 rows × 2 columns
final_y1 = pd.merge(test_values['Y1'], result['Predicted Y1'], left_index=True,right_index=True)
final_y1
| Y1 | Predicted Y1 | |
|---|---|---|
| 0 | 35.65 | 36.2183 |
| 1 | 12.10 | 12.1999 |
| 2 | 37.03 | 36.5728 |
| 3 | 14.70 | 14.6368 |
| 4 | 24.04 | 24.2178 |
| ... | ... | ... |
| 149 | 29.79 | 28.6452 |
| 150 | 12.67 | 12.6679 |
| 151 | 28.07 | 29.2666 |
| 152 | 29.40 | 29.6490 |
| 153 | 29.05 | 28.1403 |
154 rows × 2 columns
final_y2 = pd.merge(test_values['Y2'], result['Predicted Y2'], left_index=True,right_index=True)
final_y2
| Y2 | Predicted Y2 | |
|---|---|---|
| 0 | 41.07 | 39.1086 |
| 1 | 15.57 | 14.9466 |
| 2 | 34.99 | 37.2743 |
| 3 | 17.00 | 17.0479 |
| 4 | 26.18 | 25.9339 |
| ... | ... | ... |
| 149 | 29.92 | 33.3159 |
| 150 | 15.83 | 15.6556 |
| 151 | 34.14 | 30.0071 |
| 152 | 32.93 | 28.7731 |
| 153 | 29.67 | 33.8339 |
154 rows × 2 columns
visulaize_performance_of_the_model(final_y2['Y2'], final_y2['Predicted Y2'], 'CatBoost regressor')
df = pd.DataFrame(pd.read_excel('C:\\Users\\harip\\INEURON_PROJECTS\\Energy Efficiency\\energy+efficiency\\ENB2012_data.xlsx'))
X = df.drop(['Y2'],axis=1)
Y = df['Y2']
cbr_trn_score = []
cbr_test_score = []
for i in tqdm(range(1000)):
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.2)
cbr = CatBoostRegressor(verbose=0).fit(x_train,y_train)
pred = cbr.predict(x_test)
pred_trn = cbr.predict(x_train)
cbr_test_score.append(r2_score(y_test, pred))
cbr_trn_score.append(r2_score(y_train, pred_trn))
100%|██████████| 1000/1000 [15:17<00:00, 1.09it/s]
fig = make_subplots(rows = 2, cols = 1)
fig.append_trace(go.Scatter(y = cbr_test_score, name = 'Test Score'), row=1, col=1)
fig.append_trace(go.Scatter(y = cbr_trn_score, name = 'Train Score'), row=2, col=1)
fig.update_layout(title = 'Train vs Test Score on CatBoostRegressor')
fig.show()
visulaize_performance_of_the_model(pred, y_test, 'CatBoost regressor')